package com.chb.weibo1tmp;

import java.io.IOException;
import java.io.StringReader;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

public class FirstMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
		@Override
		protected void map(LongWritable key, Text value, Context context)
				throws IOException, InterruptedException {
			if (value.toString().split("\t").length < 2 ) {
				System.out.println("没有词条");
				return;
			}
			String id = value.toString().split("\t")[0];
			String content = value.toString().split("\t")[1];
			
			StringReader sr = new StringReader(content);
			//词条拆分器
			IKSegmenter ikSegmenter = new IKSegmenter(sr, true);
			Lexeme word = null;
			
			while((word = ikSegmenter.next()) != null){
				String w = word.getLexemeText();
				//统计词频
				context.write(new Text(w+"_"+id), new IntWritable(1));
			}
			//每一行数据就是一个微博的id,  内容
			context.write(new Text("count"), new IntWritable(1));
			
			
		}
}
